{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 优化ASR(自动语音识别)机器人"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "API_KEY = 'kCTtn4CUMx2rvvh4sMNiNEV2'\n",
    "SECRET_KEY = '00fVsAOQ11GTKKmUIYGSn1PYWyQPrzpr'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'24.51dc5c89143333b39bb0b421b2cfd8fd.2592000.1655984275.282335-25345323'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from baidu_speech import fetch_token,asr\n",
    "fetch_token(API_KEY,SECRET_KEY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'24.51dc5c89143333b39bb0b421b2cfd8fd.2592000.1655984275.282335-25345323'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xu_token = fetch_token(API_KEY,SECRET_KEY)\n",
    "xu_token"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import speech_recognition\n",
    "r = speech_recognition.Recognizer()\n",
    "with speech_recognition.Microphone() as source:\n",
    "    audio = r.listen(source)\n",
    "# 将数据保存到wav文件中\n",
    "with open(\"1.wav\", \"wb\") as f: \n",
    "    f.write(audio.get_wav_data(convert_rate=16000))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Request time cost 1.254773\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'{\"corpus_no\":\"7101265801583462608\",\"err_msg\":\"success.\",\"err_no\":0,\"result\":[\"和个性化数据体验的这个体验好，那么就登录了我这边的话，我就来登陆一下嗯。\"],\"sn\":\"357475567191653392287\"}\\n'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "asr(xu_token,\"1.wav\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 连接图灵机器人"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests,json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "TL_KEY = \"35c7652f6a0c4f8393a62d3519fc4799\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'http://openapi.turingapi.com/openapi/api/v2'\n",
    "data = {\n",
    "\t\"reqType\":0,\n",
    "    \"perception\": {\n",
    "        \"inputText\": {\n",
    "            \"text\": \"你好，机器人\"\n",
    "        },\n",
    "        \"inputImage\": {\n",
    "            \"url\": \"imageUrl\"\n",
    "        },\n",
    "        \"selfInfo\": {\n",
    "            \"location\": {\n",
    "                \"city\": \"北京\",\n",
    "                \"province\": \"北京\",\n",
    "                \"street\": \"信息路\"\n",
    "            }\n",
    "        }\n",
    "    },\n",
    "    \"userInfo\": {\n",
    "        \"apiKey\": \"35c7652f6a0c4f8393a62d3519fc4799\",\n",
    "        \"userId\": \"000001\"\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'emotion': {'robotEmotion': {'a': 0, 'd': 0, 'emotionId': 0, 'p': 0}, 'userEmotion': {'a': 0, 'd': 0, 'emotionId': 0, 'p': 0}}, 'intent': {'actionName': '', 'code': 10004, 'intentName': ''}, 'results': [{'groupType': 1, 'resultType': 'text', 'values': {'text': '我是机器人，我为机器人代言'}}]}\n"
     ]
    }
   ],
   "source": [
    "req = json.dumps(data).encode('utf8')\n",
    "r = requests.post(url,req).json()\n",
    "print(r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def TL_API(TL_KEY,text):\n",
    "    TL_url = 'http://openapi.turingapi.com/openapi/api/v2'\n",
    "    payload={\n",
    "        \"reqType\":0,\n",
    "        \"perception\": {\n",
    "            \"inputText\": {\n",
    "                \"text\": text\n",
    "            },\n",
    "        },\n",
    "        \"userInfo\": {\n",
    "            \"apiKey\": TL_key,\n",
    "            \"userId\": \"0000001\"\n",
    "        }\n",
    "    }\n",
    "    payload_json = json.dumps(payload)\n",
    "    res = requests.post(TL_url,data=json.dumps(payload))\n",
    "    return res.json()['results'][0]['values']['text']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'res' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-22-e0cbf203d53e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mres\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'results'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'values'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'text'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m: name 'res' is not defined"
     ]
    }
   ],
   "source": [
    "res.json()['results'][0]['values']['text']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> 1. 准备音频（临时文件-时间期限）---音频文件\n",
    "> 2. 调用百度语音识别---文本信息（识别到的内容）\n",
    "> 3. 调用图灵机器人---文本信息（智能返回的内容）\n",
    "> 4. (待做).文本信息--> 语音合成"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'pydub'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-23-9a40c809e6f4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# 1. 准备音频文件\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mspeech_recognition\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpydub\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mAudioSegment\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpydub\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mplayback\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mplay\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pydub'"
     ]
    }
   ],
   "source": [
    "# 1. 准备音频文件\n",
    "import speech_recognition\n",
    "from pydub import AudioSegment\n",
    "from pydub.playback import play\n",
    "\n",
    "r = speech_recognition.Recognizer()\n",
    "with speech_recognition.Microphone() as source:\n",
    "    audio = r.listen(source)\n",
    "# 将数据保存到wav文件中\n",
    "with open(\"1.wav\", \"wb\") as f: \n",
    "    f.write(audio.get_wav_data(convert_rate=16000))\n",
    "\n",
    "# 2. 调用百度\n",
    "xu_token = fetch_token(API_KEY,SECRET_KEY)\n",
    "asr_result = eval(asr(token = xu_token, AUDIO_FILE = '1.wav'))['result'][0]\n",
    "\n",
    "# 3. 调用图灵机器\n",
    "TL_API(TL_key,asr_result)\n",
    "\n",
    "# 4. 语音合成\n",
    "tts(xu_token,TL_API(TL_key,asr_result))\n",
    "song = AudioSegment.from_wav('result.wav')\n",
    "play(song)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "1. 获取token\n",
    "2. asr语音识别技术\n",
    "3. tts语音合成技术\n",
    "\n",
    "'''\n",
    "import sys\n",
    "import json\n",
    "import time\n",
    "\n",
    "from urllib.request import urlopen\n",
    "from urllib.request import Request\n",
    "from urllib.error import URLError\n",
    "from urllib.parse import urlencode\n",
    "from urllib.parse import quote_plus\n",
    "timer = time.perf_counter\n",
    "\n",
    "SCOPE = 'audio_voice_assistant_get' \n",
    "TOKEN_URL = 'http://aip.baidubce.com/oauth/2.0/token'\n",
    "ASR_URL = 'http://vop.baidu.com/server_api'\n",
    "\n",
    "# 1. 获取token\n",
    "def fetch_token(API_KEY,SECRET_KEY):\n",
    "    \n",
    "    params = {\n",
    "        'grant_type': 'client_credentials',\n",
    "        'client_id': API_KEY,\n",
    "        'client_secret': SECRET_KEY\n",
    "    }\n",
    "    post_data = urlencode(params)\n",
    "    post_data = post_data.encode('utf-8')\n",
    "    req = Request(TOKEN_URL, post_data)\n",
    "    try:\n",
    "        f = urlopen(req)\n",
    "        result_str = f.read()\n",
    "    except URLError as err:\n",
    "        print('token http response http code : ' + str(err.code))\n",
    "        result_str = err.read()\n",
    "    result_str = result_str.decode()\n",
    "    result = json.loads(result_str)\n",
    "    if ('access_token' in result.keys() and 'scope' in result.keys()):\n",
    "        if SCOPE and (not SCOPE in result['scope'].split(' ')):  # SCOPE = False 忽略检查\n",
    "            raise DemoError('scope is not correct')\n",
    "        return result['access_token']\n",
    "    else:\n",
    "        raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')\n",
    "\n",
    "# 2. 语音识别       \n",
    "def asr(token,AUDIO_FILE,CUID='123456PYTHON',DEV_PID=1537,RATE=16000): \n",
    "    speech_data = []\n",
    "    with open(AUDIO_FILE, 'rb') as speech_file:\n",
    "        speech_data = speech_file.read()\n",
    "    length = len(speech_data)\n",
    "    if length == 0:\n",
    "        raise DemoError('file %s length read 0 bytes' % AUDIO_FILE)\n",
    "    params = {'cuid': CUID, 'token': token, 'dev_pid': DEV_PID}\n",
    "    params_query = urlencode(params);\n",
    "    headers = {\n",
    "        'Content-Type': 'audio/' + AUDIO_FILE[-3:] + '; rate=' + str(RATE),\n",
    "        'Content-Length': length\n",
    "    }\n",
    "    url = ASR_URL + \"?\" + params_query\n",
    "    req = Request(ASR_URL + \"?\" + params_query, speech_data, headers)\n",
    "    try:\n",
    "        begin = timer()\n",
    "        f = urlopen(req)\n",
    "        result_str = f.read()\n",
    "        print(\"Request time cost %f\" % (timer() - begin))\n",
    "    except  URLError as err:\n",
    "        print('asr http response http code : ' + str(err.code))\n",
    "        result_str = err.read()\n",
    "    result_str = str(result_str, 'utf-8')\n",
    "    with open(\"result.txt\", \"w\") as of:\n",
    "        of.write(result_str)\n",
    "    return result_str\n",
    "\n",
    "\n",
    "# 3. 语音合成\n",
    "\n",
    "TTS_URL = 'http://tsn.baidu.com/text2audio'\n",
    "\n",
    "def tts(token,TEXT,CUID='123456PYTHON'):\n",
    "    # 发音人选择, 基础音库：0为度小美，1为度小宇，3为度逍遥，4为度丫丫，\n",
    "    # 精品音库：5为度小娇，103为度米朵，106为度博文，110为度小童，111为度小萌，默认为度小美 \n",
    "    PER = 4\n",
    "    # 语速，取值0-15，默认为5中语速\n",
    "    SPD = 5\n",
    "    # 音调，取值0-15，默认为5中语调\n",
    "    PIT = 5\n",
    "    # 音量，取值0-9，默认为5中音量\n",
    "    VOL = 5\n",
    "    # 下载的文件格式, 3：mp3(default) 4： pcm-16k 5： pcm-8k 6. wav\n",
    "    AUE = 6\n",
    "    FORMATS = {3: \"mp3\", 4: \"pcm\", 5: \"pcm\", 6: \"wav\"}\n",
    "    FORMAT = FORMATS[AUE]\n",
    "    SCOPE = 'audio_tts_post'  # 有此scope表示有tts能力，没有请在网页里勾选\n",
    "    tex = quote_plus(TEXT)  # 此处TEXT需要两次urlencode\n",
    "    params = {'tok': token, 'tex': tex, 'per': PER, 'spd': SPD, 'pit': PIT, 'vol': VOL, 'aue': AUE, 'cuid': CUID,\n",
    "              'lan': 'zh', 'ctp': 1}  # lan ctp 固定参数\n",
    "\n",
    "    data = urlencode(params)\n",
    "    #     print('test on Web Browser' + TTS_URL + '?' + data)\n",
    "\n",
    "    req = Request(TTS_URL, data.encode('utf-8'))\n",
    "    has_error = False\n",
    "    try:\n",
    "        f = urlopen(req)\n",
    "        result_str = f.read()\n",
    "\n",
    "        headers = dict((name.lower(), value) for name, value in f.headers.items())\n",
    "\n",
    "        has_error = ('content-type' not in headers.keys() or headers['content-type'].find('audio/') < 0)\n",
    "    except  URLError as err:\n",
    "        print('asr http response http code : ' + str(err.code))\n",
    "        result_str = err.read()\n",
    "        has_error = True\n",
    "\n",
    "    save_file = \"error.txt\" if has_error else 'result.' + FORMAT\n",
    "    with open(save_file, 'wb') as of:\n",
    "        of.write(result_str)\n",
    "\n",
    "    if has_error:\n",
    "  \n",
    "        result_str = str(result_str, 'utf-8')\n",
    "        print(\"tts api  error:\" + result_str)\n",
    "\n",
    "    print(\"result saved as :\" + save_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
